# 03_political_knowledge_predict_pattern.R
# Created: 2020-1-10 Taka-aki Asano
# Last Modified: 2023-2-19

# package
require("psych")
require("magrittr")
require("nnet")
require("coefplot")
require("effects")


# explanatory variables
voter2019$FEMALE <- ifelse(voter2019$SEX == 2, 1, 0)
voter2019$GENERATION <- NA
voter2019$GENERATION[voter2019$AGE <= 29] <- 1
voter2019$GENERATION[voter2019$AGE >= 30 & voter2019$AGE <= 39] <- 2
voter2019$GENERATION[voter2019$AGE >= 40 & voter2019$AGE <= 49] <- 3
voter2019$GENERATION[voter2019$AGE >= 50 & voter2019$AGE <= 59] <- 4
voter2019$GENERATION[voter2019$AGE >= 60 & voter2019$AGE <= 69] <- 5
voter2019$GENERATION[voter2019$AGE >= 70] <- 6
voter2019$EDUCATION[voter2019$EDUCATION == 5] <- NA
voter2019$INTEREST_RE <- recode(voter2019$INTEREST, `1` = 5, `2` = 4, `3` = 3, `4` = 2, `5` = 1)


# descriptive statistics for explanatory variables
describe <- psych::describe(
  voter2019[,c("FEMALE", "GENERATION", "EDUCATION", "INTEREST_RE", "NEWSPAPER", "TV", "ONLINE")], 
  skew = FALSE, ranges = TRUE
)
describe$vars <- NULL; describe$range <- NULL
(describe <- set_rownames(
  describe, 
  c("FEMALE", "Age", "Education Level", "Political Interest", "Newspaper", "TV", "Online News")
  )
)


# multinomial logistic regression
voter2019_mlogit <- dplyr::select(
  voter2019, Knowledge_Class, FEMALE, GENERATION, EDUCATION, 
  INTEREST_RE, NEWSPAPER, TV, ONLINE
)
voter2019_mlogit <- na.omit(voter2019_mlogit)
multi_logit <- multinom(
  Knowledge_Class ~ FEMALE + GENERATION + EDUCATION + 
    INTEREST_RE + NEWSPAPER + TV + ONLINE, 
  data = voter2019_mlogit, reflevel = "Class1"
)
summary(multi_logit)


# plot coefficients
multi_logit_plot <- broom::tidy(multi_logit, 
                                conf.int = TRUE, 
                                exponentiate = FALSE) %>% 
  dplyr::filter(term != "(Intercept)") %>% 
  mutate(term = recode(term, 
                       `FEMALE` = "Female", 
                       `GENERATION` = "Age",  
                       `EDUCATION` = "Education", 
                       `INTEREST_RE` = "Political Interest", 
                       `NEWSPAPER` = "Newspaper", 
                       `TV` = "TV", 
                       `ONLINE` = "Internet")) %>% 
  mutate(term = factor(term, 
                       levels = rev(c("Female", "Age", "Education", 
                                      "Political Interest", 
                                      "Newspaper", "TV", "Internet")))) %>% 
  ggplot(aes(x = term, y = estimate, 
             ymin = conf.low, ymax = conf.high)) + 
  geom_point() + geom_hline(yintercept = 0, color = "gray", linetype = "dashed") + 
  geom_pointrange() + coord_flip() + facet_wrap(~y.level, ncol = 3) + 
  labs(x = "Explanatory Variables", y = "Estimation", 
       title = "Multinomial Logistic Regression \nBaseline: Class1") + 
  theme(plot.title = element_text(hjust = 0.5))
plot(multi_logit_plot)


# marginal effect
## newspaper
predict_newspaper <- Effect(c("NEWSPAPER"), multi_logit, 
                            xlevels = list(NEWSPAPER = seq(1, 5, 0.01)), 
                            given.values = c(FEMALE = 1, 
                                             GENERATION = mean(voter2019$GENERATION), 
                                             EDUCATION = 4, 
                                             INTEREST_RE = mean(voter2019$INTEREST_RE), 
                                             TV = mean(voter2019$TV), 
                                             ONLINE = mean(voter2019$ONLINE)))
predict_newspaper <- cbind(predict_newspaper$x, 
                           predict_newspaper$prob, 
                           predict_newspaper$se.prob) %>% 
  gather(key = "Class", value = "Rate", -NEWSPAPER)
predict_newspaper$Class <- str_replace_all(predict_newspaper$Class, 
                                           pattern = "se.prob", 
                                           replacement = "SE")
predict_newspaper$Class <- str_replace_all(predict_newspaper$Class, 
                                           pattern = "prob", 
                                           replacement = "Prob")
predict_newspaper <- separate(predict_newspaper, col = Class, sep = "\\.", 
                              into = c("Estimate", "Class"))
predict_newspaper <- spread(predict_newspaper, key = "Estimate", value = "Rate")
predict_newspaper_plot <- ggplot(predict_newspaper, 
                                 aes(x = NEWSPAPER, y = Prob, 
                                     ymax = Prob + 1.96*SE, 
                                     ymin = Prob - 1.96*SE)) + 
  geom_line(aes(color = Class)) + ylim(0, 0.6) + 
  labs(x = "Frequency of Exposure", y = "Probability", title = "(a) Newspaper") + 
  scale_color_brewer(palette = "Set1", 
                     name = "", 
                     labels = c("Class1" = "Class 1", "Class2" = "Class 2", 
                                "Class3" = "Class 3", "Class4" = "Class 4")) + 
  theme(plot.title = element_text(hjust = 0.5))
plot(predict_newspaper_plot)

## TV
predict_tv <- Effect(c("TV"), multi_logit, 
                     xlevels = list(TV = seq(1, 5, 0.01)), 
                     given.values = c(FEMALE = 1, 
                                      GENERATION = mean(voter2019$GENERATION), 
                                      EDUCATION = 4, 
                                      INTEREST_RE = mean(voter2019$INTEREST_RE), 
                                      NEWSPAPER = mean(voter2019$NEWSPAPER), 
                                      ONLINE = mean(voter2019$ONLINE)))
predict_tv <- cbind(predict_tv$x, 
                    predict_tv$prob, 
                    predict_tv$se.prob) %>% 
  gather(key = "Class", value = "Rate", -TV)
predict_tv$Class <- str_replace_all(predict_tv$Class, 
                                    pattern = "se.prob", 
                                    replacement = "SE")
predict_tv$Class <- str_replace_all(predict_tv$Class, 
                                    pattern = "prob", 
                                    replacement = "Prob")
predict_tv <- separate(predict_tv, col = Class, sep = "\\.", 
                       into = c("Estimate", "Class"))
predict_tv <- spread(predict_tv, key = "Estimate", value = "Rate")
predict_tv_plot <- ggplot(predict_tv, 
                          aes(x = TV, y = Prob, 
                              ymax = Prob + 1.96*SE, 
                              ymin = Prob - 1.96*SE)) + 
  geom_line(aes(color = Class)) + ylim(0, 0.6) + 
  labs(x = "Frequency of Exposure", y = "Probability", title = "(b) TV") + 
  scale_color_brewer(palette = "Set1", 
                     name = "", 
                     labels = c("Class1" = "Class 1", "Class2" = "Class 2", 
                                "Class3" = "Class 3", "Class4" = "Class 4")) + 
  theme(plot.title = element_text(hjust = 0.5))
plot(predict_tv_plot)

## Online news
predict_internet <- Effect(c("ONLINE"), multi_logit, 
                           xlevels = list(ONLINE = seq(1, 5, 0.01)), 
                           given.values = c(FEMALE = 1, 
                                            GENERATION = mean(voter2019$GENERATION), 
                                            EDUCATION = 4, 
                                            INTEREST_RE = mean(voter2019$INTEREST_RE), 
                                            NEWSPAPER = mean(voter2019$NEWSPAPER), 
                                            TV = mean(voter2019$TV)))
predict_internet <- cbind(predict_internet$x, 
                          predict_internet$prob, 
                          predict_internet$se.prob) %>% 
  gather(key = "Class", value = "Rate", -ONLINE)
predict_internet$Class <- str_replace_all(predict_internet$Class, 
                                          pattern = "se.prob", 
                                          replacement = "SE")
predict_internet$Class <- str_replace_all(predict_internet$Class, 
                                          pattern = "prob", 
                                          replacement = "Prob")
predict_internet <- separate(predict_internet, col = Class, sep = "\\.", 
                             into = c("Estimate", "Class"))
predict_internet <- spread(predict_internet, key = "Estimate", value = "Rate")
predict_internet_plot <- ggplot(predict_internet, 
                                aes(x = ONLINE, y = Prob, 
                                    ymax = Prob + 1.96*SE, 
                                    ymin = Prob - 1.96*SE)) + 
  geom_line(aes(color = Class)) + ylim(0, 0.6) + 
  labs(x = "Frequency of Exposure", y = "Probability", title = "(c) Online News") + 
  scale_color_brewer(palette = "Set1", 
                     name = "", 
                     labels = c("Class1" = "Class 1", "Class2" = "Class 2", 
                                "Class3" = "Class 3", "Class4" = "Class 4")) + 
  theme(plot.title = element_text(hjust = 0.5))
plot(predict_internet_plot)
